!pip install plotly
Requirement already satisfied: plotly in c:\users\user\anaconda3\lib\site-packages (5.10.0) Requirement already satisfied: tenacity>=6.2.0 in c:\users\user\anaconda3\lib\site-packages (from plotly) (8.0.1)
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
path="C:\\Users\\user\\ict_class\\kaggle4th_flask_ml\\wine_flask_test\\"
df = pd.read_csv(path + "data\wine.csv")
df
| fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | Id | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 7.4 | 0.700 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.99780 | 3.51 | 0.56 | 9.4 | 5 | 0 |
| 1 | 7.8 | 0.880 | 0.00 | 2.6 | 0.098 | 25.0 | 67.0 | 0.99680 | 3.20 | 0.68 | 9.8 | 5 | 1 |
| 2 | 7.8 | 0.760 | 0.04 | 2.3 | 0.092 | 15.0 | 54.0 | 0.99700 | 3.26 | 0.65 | 9.8 | 5 | 2 |
| 3 | 11.2 | 0.280 | 0.56 | 1.9 | 0.075 | 17.0 | 60.0 | 0.99800 | 3.16 | 0.58 | 9.8 | 6 | 3 |
| 4 | 7.4 | 0.700 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.99780 | 3.51 | 0.56 | 9.4 | 5 | 4 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1138 | 6.3 | 0.510 | 0.13 | 2.3 | 0.076 | 29.0 | 40.0 | 0.99574 | 3.42 | 0.75 | 11.0 | 6 | 1592 |
| 1139 | 6.8 | 0.620 | 0.08 | 1.9 | 0.068 | 28.0 | 38.0 | 0.99651 | 3.42 | 0.82 | 9.5 | 6 | 1593 |
| 1140 | 6.2 | 0.600 | 0.08 | 2.0 | 0.090 | 32.0 | 44.0 | 0.99490 | 3.45 | 0.58 | 10.5 | 5 | 1594 |
| 1141 | 5.9 | 0.550 | 0.10 | 2.2 | 0.062 | 39.0 | 51.0 | 0.99512 | 3.52 | 0.76 | 11.2 | 6 | 1595 |
| 1142 | 5.9 | 0.645 | 0.12 | 2.0 | 0.075 | 32.0 | 44.0 | 0.99547 | 3.57 | 0.71 | 10.2 | 5 | 1597 |
1143 rows × 13 columns
| column 명 | 뜻 |
|---|---|
| fixed acidity | 고정산 |
| volatile acidity | 휘발성 산 |
| citric acid | 구연산 |
| residual sugar | 잔당 |
| chlorides | 염화물 |
| free sulfur dioxide | 자유 이산화황 |
| total sulfur dioxide | 전체 이산화황 |
| density | 밀도 |
| pH | 산도 |
| sulphates | 황산염 |
| alcohol | 알코올 |
| quality (score between 0 and 10) | 품질 |
df.describe()[1:].T.style.background_gradient(cmap='Reds')
| mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|
| fixed acidity | 8.311111 | 1.747595 | 4.600000 | 7.100000 | 7.900000 | 9.100000 | 15.900000 |
| volatile acidity | 0.531339 | 0.179633 | 0.120000 | 0.392500 | 0.520000 | 0.640000 | 1.580000 |
| citric acid | 0.268364 | 0.196686 | 0.000000 | 0.090000 | 0.250000 | 0.420000 | 1.000000 |
| residual sugar | 2.532152 | 1.355917 | 0.900000 | 1.900000 | 2.200000 | 2.600000 | 15.500000 |
| chlorides | 0.086933 | 0.047267 | 0.012000 | 0.070000 | 0.079000 | 0.090000 | 0.611000 |
| free sulfur dioxide | 15.615486 | 10.250486 | 1.000000 | 7.000000 | 13.000000 | 21.000000 | 68.000000 |
| total sulfur dioxide | 45.914698 | 32.782130 | 6.000000 | 21.000000 | 37.000000 | 61.000000 | 289.000000 |
| density | 0.996730 | 0.001925 | 0.990070 | 0.995570 | 0.996680 | 0.997845 | 1.003690 |
| pH | 3.311015 | 0.156664 | 2.740000 | 3.205000 | 3.310000 | 3.400000 | 4.010000 |
| sulphates | 0.657708 | 0.170399 | 0.330000 | 0.550000 | 0.620000 | 0.730000 | 2.000000 |
| alcohol | 10.442111 | 1.082196 | 8.400000 | 9.500000 | 10.200000 | 11.100000 | 14.900000 |
| quality | 5.657043 | 0.805824 | 3.000000 | 5.000000 | 6.000000 | 6.000000 | 8.000000 |
| Id | 804.969379 | 463.997116 | 0.000000 | 411.000000 | 794.000000 | 1209.500000 | 1597.000000 |
fig,ax=plt.subplots(6,2,figsize=(15,30))
sns.countplot(x=df.quality,ax=ax[0][0]).set_title('Target Distribution',size=15)
sns.boxplot(x=df.quality,y=df['volatile acidity'],ax=ax[0][1])
sns.boxplot(x=df.quality,y=df['citric acid'],ax=ax[1][0])
sns.boxplot(x=df.quality,y=df['residual sugar'],ax=ax[1][1])
sns.boxplot(x=df.quality,y=df['chlorides'],ax=ax[2][0])
sns.boxplot(x=df.quality,y=df['free sulfur dioxide'],ax=ax[2][1])
sns.boxplot(x=df.quality,y=df['total sulfur dioxide'],ax=ax[3][0])
sns.boxplot(x=df.quality,y=df['density'],ax=ax[3][1])
sns.boxplot(x=df.quality,y=df['pH'],ax=ax[4][0])
sns.boxplot(x=df.quality,y=df['sulphates'],ax=ax[4][1])
sns.boxplot(x=df.quality,y=df['alcohol'],ax=ax[5][0])
sns.boxplot(x=df.quality,y=df['fixed acidity'],ax=ax[5][1])
<AxesSubplot:xlabel='quality', ylabel='fixed acidity'>
corr = df.corr()
plt.figure(figsize=(15, 10))
sns.heatmap(corr, annot=True, cmap="Blues", annot_kws={"fontsize":13})
plt.title("Correlation")
Text(0.5, 1.0, 'Correlation')
sns.pairplot(df,corner=True, hue='quality',
x_vars=['density','alcohol','pH','volatile acidity','citric acid','sulphates','fixed acidity'],
y_vars=['density','alcohol','pH','volatile acidity','citric acid','sulphates','fixed acidity']
)
<seaborn.axisgrid.PairGrid at 0x25d96656ee0>
df.hist(figsize=(20,15),bins='auto', color='#6B7FFF')
plt.show()
import plotly.express as px
fig = px.scatter(df, x="total sulfur dioxide", y="free sulfur dioxide", color=df['quality'], color_continuous_scale='Greens')
fig.update_layout(legend_title_text='Quality')